***********************************************************************************
* Merging NEW-OWES-ONET-SOC-CEN2010-IPUMS with CPS data
* last modified 7/16/2025
*
* Note: Need to aggregate SOC in NEW-OWES-ONET-SOC-CEN2010-IPUMS into CEN2010 in CPS
*
***********************************************************************************


clear
capture log close 
set more off

***************
* Directories *
***************

cd "/Users/"

global dofile			=	"./Code/NEW-OEWS"
global matrix_data		=   "./Data/National Employment Matrix"
global oews_data		= 	"./Data/OEWS/all_excel"
global onet_data		= 	"./OccLink/ONET/ONET data/merged"
global cps_data			= 	"./Data/CPS/IPUMS/00015"
global xwalk 			=	"./Data/Crosswalk"
global results 			=	"./Results"
global temp				= 	"./Temp"



********************************
* Aggregate SOC into Cen2010   *
********************************
tempfile occ_projection

use "${temp}/oews_matrix_ooh_onet2000-2020_soc_ipums.dta", replace

keep soc cen2010 ipums2010 year baseempsize proj pc_chng a_mean a_pct10 a_pct90 jobopening soctitle ipums_title

drop if ipums2010 == ""


* aggregate occ vars from OOH, OEWS and ONET 
bys ipums2010 year: egen empsize = sum(baseempsize)
bys ipums2010 year: egen projsize = sum(proj)
bys ipums2010 year: asgen proj_pc_growth = pc_chng, w(baseempsize)
bys ipums2010 year: asgen annual_mean = a_mean, w(baseempsize)
bys ipums2010 year: asgen annual_pct10 = a_pct10, w(baseempsize)
bys ipums2010 year: asgen annual_pct90 = a_pct90, w(baseempsize)

destring ipums2010, replace

save "`occ_projection'", replace


* generate biennial occ change
tempfile occ_change_rate

use "`occ_projection'", replace

drop if year == 2019 /*drop 2019*/

keep ipums2010 year empsize

duplicates drop ipums2010 year, force 

bys ipums2010 (year): gen empsize_minus2 = empsize[_n-1] 

gen emp_chng_rate = (empsize - empsize_minus2)/empsize_minus2

replace emp_chng_rate = emp_chng_rate[_n+1] if year == 2000 /* impute 2000 change to be the same as 2002 change as 1998 data are missing (soc not harmonized) */

gen emp_pc_growth = emp_chng_rate * 100 /* convert rate to percentages */

save "`occ_change_rate'"

use "`occ_projection'", replace

merge n:1 ipums2010 year using "`occ_change_rate'"

save "${temp}/oews_matrix_ooh2000-2020_ipums_occ.dta", replace


*************************************************
* Generate an OOH variable based on IPUMS2010   *
*************************************************

use "${temp}/oews_matrix_ooh2000-2020_ipums_occ.dta", replace

/* 2000 */
gen outlook = 1     if proj_pc_growth >= 36.00 & year == 2000
replace outlook = 2 if inrange(proj_pc_growth, 21.00, 35.99) & year == 2000
replace outlook = 3 if inrange(proj_pc_growth, 10.00, 20.99) & year == 2000
replace outlook = 4 if inrange(proj_pc_growth, 3.00, 9.9) & year == 2000
replace outlook = 5 if inrange(proj_pc_growth, -0.99, 2.99) & year == 2000
replace outlook = 6 if proj_pc_growth <= -1 & year == 2000

/* 2002 */
replace outlook = 1 if proj_pc_growth >= 36.00 & year == 2002
replace outlook = 2 if inrange(proj_pc_growth, 21.00, 35.99) & year == 2002
replace outlook = 3 if inrange(proj_pc_growth, 10.00, 20.99) & year == 2002
replace outlook = 4 if inrange(proj_pc_growth, 3.00, 9.99) & year == 2002
replace outlook = 5 if inrange(proj_pc_growth, -0.99, 2.99) & year == 2002
replace outlook = 6 if proj_pc_growth <= -1 & year == 2002

/* 2004 */
replace outlook = 1 if proj_pc_growth >= 27.00 & year == 2004
replace outlook = 2 if inrange(proj_pc_growth, 18.00, 26.99) & year == 2004
replace outlook = 3 if inrange(proj_pc_growth,  9.00, 17.99) & year == 2004
replace outlook = 4 if inrange(proj_pc_growth,  0.00, 8.99) & year == 2004
replace outlook = 6 if proj_pc_growth <= -0 & year == 2004

/* 2006 */
replace outlook = 1 if proj_pc_growth >= 21.00 & year == 2006
replace outlook = 2 if inrange(proj_pc_growth, 14.00, 20.99) & year == 2006
replace outlook = 3 if inrange(proj_pc_growth,  7.00, 13.99) & year == 2006
replace outlook = 4 if inrange(proj_pc_growth,  3.00,  6.99) & year == 2006
replace outlook = 5 if inrange(proj_pc_growth, -2.99,  2.99) & year == 2006
replace outlook = 7 if inrange(proj_pc_growth, -9.99, -3.00) & year == 2006
replace outlook = 8 if proj_pc_growth <= -10.00 & year == 2006

/* 2008 */
replace outlook = 1 if proj_pc_growth >= 20.00 & year == 2008
replace outlook = 2 if inrange(proj_pc_growth, 14.00, 19.99) & year == 2008
replace outlook = 3 if inrange(proj_pc_growth,  7.00, 13.99) & year == 2008
replace outlook = 4 if inrange(proj_pc_growth,  3.00,  6.99) & year == 2008
replace outlook = 5 if inrange(proj_pc_growth, -2.99,  2.99) & year == 2008
replace outlook = 7 if inrange(proj_pc_growth, -9.99, -3.00) & year == 2008
replace outlook = 8 if proj_pc_growth <= -10.00 & year == 2008


/* 2010 */
replace outlook = 1 if proj_pc_growth >= 29.00 & year == 2010
replace outlook = 2 if inrange(proj_pc_growth, 20.00, 28.99) & year == 2010
replace outlook = 3 if inrange(proj_pc_growth, 10.00, 19.99) & year == 2010
replace outlook = 4 if inrange(proj_pc_growth,  3.00,  9.99) & year == 2010
replace outlook = 5 if inrange(proj_pc_growth, -2.99,  2.99) & year == 2010
replace outlook = 7 if inrange(proj_pc_growth, -9.99, -3.00) & year == 2010
replace outlook = 8 if proj_pc_growth <= -10.00 & year == 2010


/* 2012 */
replace outlook = 1 if proj_pc_growth >= 22.00 & year == 2012
replace outlook = 2 if inrange(proj_pc_growth, 15.00, 21.99) & year == 2012
replace outlook = 3 if inrange(proj_pc_growth,  8.00, 14.99) & year == 2012
replace outlook = 4 if inrange(proj_pc_growth,  3.00,  7.99) & year == 2012
replace outlook = 5 if inrange(proj_pc_growth, -2.99,  2.99) & year == 2012
replace outlook = 6 if proj_pc_growth <= -3.00 & year == 2012

/* 2014 */
replace outlook = 1 if proj_pc_growth >= 14.00 & year == 2014
replace outlook = 2 if inrange(proj_pc_growth,  9.00, 13.99) & year == 2014
replace outlook = 3 if inrange(proj_pc_growth,  5.00,  8.99) & year == 2014
replace outlook = 4 if inrange(proj_pc_growth,  2.00,  4.99) & year == 2014
replace outlook = 5 if inrange(proj_pc_growth, -1.99,  1.99) & year == 2014
replace outlook = 6 if proj_pc_growth <= -2.00 & year == 2014

/* 2016 */
replace outlook = 1 if proj_pc_growth >= 15.00 & year == 2016
replace outlook = 2 if inrange(proj_pc_growth, 10.00, 14.99) & year == 2016
replace outlook = 3 if inrange(proj_pc_growth,  5.00,  9.99) & year == 2016
replace outlook = 4 if inrange(proj_pc_growth,  2.00,  4.99) & year == 2016
replace outlook = 5 if inrange(proj_pc_growth, -1.99,  1.99) & year == 2016
replace outlook = 6 if proj_pc_growth <= -2.00 & year == 2016

/* 2018 */
replace outlook = 1 if proj_pc_growth >= 11.00 & year == 2018
replace outlook = 2 if inrange(proj_pc_growth,  7.00, 10.99) & year == 2018
replace outlook = 3 if inrange(proj_pc_growth,  4.00,  6.99) & year == 2018
replace outlook = 4 if inrange(proj_pc_growth,  2.00,  3.99) & year == 2018
replace outlook = 5 if inrange(proj_pc_growth, -1.99,  1.99) & year == 2018
replace outlook = 6 if proj_pc_growth <= -2.00 & year == 2018

/* 2019 */
replace outlook = 1 if proj_pc_growth >= 8.00 & year == 2019
replace outlook = 2 if inrange(proj_pc_growth,  5.00,  7.99) & year == 2019
replace outlook = 3 if inrange(proj_pc_growth,  3.00,  4.99) & year == 2019
replace outlook = 4 if inrange(proj_pc_growth,  1.00,  2.99) & year == 2019
replace outlook = 5 if inrange(proj_pc_growth, -0.99,  0.99) & year == 2019
replace outlook = 6 if proj_pc_growth <= -1.00 & year == 2019

/* 2020 */
replace outlook = 1 if proj_pc_growth >= 16.00 & year == 2020
replace outlook = 2 if inrange(proj_pc_growth, 11.00, 15.99) & year == 2020
replace outlook = 3 if inrange(proj_pc_growth,  6.00, 10.99) & year == 2020
replace outlook = 4 if inrange(proj_pc_growth,  2.00,  5.99) & year == 2020
replace outlook = 5 if inrange(proj_pc_growth, -1.99,  1.99) & year == 2020
replace outlook = 6 if proj_pc_growth <= -2.00 & year == 2020


lab val outlook outlook

recode outlook (1 2 3 = 1 "growth") (4 5 = 2 "stable") (6 7 8 = 3 "decline"), gen(outlook_gr)

keep year ipums2010 empsize projsize proj_pc_growth annual_mean annual_pct10 annual_pct90 emp_chng_rate emp_pc_growth outlook outlook_gr jobopening

duplicates drop _all, force

save "${temp}/oews_matrix_ooh2000-2020_ipums_occ.dta", replace

